2. Exploratory Plots

3.1 Correlation Matrix

import matplotlib.pyplot as plt
import seaborn as sns
# Create a list of all variables
variables = ['MedHHInc','TotalPop', 'TotalPop16', 'LabForTotal', 'Unemployed','PctBach', 'PovertyRate', 'UnemploymentRate', 'LabForParticipationRate', 'netexport', 'REALGDP', 'life_expectancy', 'Labor_Productivity_2023', 'REALGDPpercapita']

# Create a list of selected variables for later analysis
selected_variables = ['REALGDPpercapita','life_expectancy','MedHHInc','PctBach','UnemploymentRate','LabForParticipationRate', 'Labor_Productivity_2023', 'TotalPop', 'PovertyRate', 'netexport']

# Calculate the correlation matrix
corr_matrix = us_rescaled_final[variables].corr()

# Plot the correlation matrix using seaborn
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix')
plt.show()

3.2 Repeated Chart and Bubble Plot

import altair as alt
# Setup the selection brush
brush = alt.selection_interval()

# Repeated chart
(
    alt.Chart(us_rescaled_final)
    .mark_circle()
    .encode(
        x=alt.X(alt.repeat("column"), type="quantitative", scale=alt.Scale(zero=False)),
        y=alt.Y(alt.repeat("row"), type="quantitative", scale=alt.Scale(zero=False)),
        color=alt.condition(
            brush, "NAME_x:N", alt.value("lightgray")
        ),  # conditional color
        tooltip=['NAME_x'] + variables
    )
    .properties(
        width=200,
        height=200,
    )
    .add_params(brush)
    .repeat(  # repeat variables across rows and columns
        row=variables,
        column=variables,
    )
)
# Define dropdown bindings for both x and y axes
dropdown_x = alt.binding_select(
    options=['MedHHInc','TotalPop', 'TotalPop16', 'LabForTotal', 'Unemployed','PctBach', 'PovertyRate', 'UnemploymentRate', 'LabForParticipationRate', 'netexport', 'REALGDP', 'life_expectancy', 'Labor_Productivity_2023'],
    name='X-axis column '
)
dropdown_y = alt.binding_select(
    options=['MedHHInc','TotalPop', 'TotalPop16', 'LabForTotal', 'Unemployed','PctBach', 'PovertyRate', 'UnemploymentRate', 'LabForParticipationRate', 'netexport', 'REALGDP', 'life_expectancy', 'Labor_Productivity_2023'],
    name='Y-axis column '
)
dropdown_size = alt.binding_select(
    options=['MedHHInc','TotalPop', 'TotalPop16', 'LabForTotal', 'Unemployed','PctBach', 'PovertyRate', 'UnemploymentRate', 'LabForParticipationRate', 'netexport', 'REALGDP', 'life_expectancy', 'Labor_Productivity_2023'],
    name='Bubble Size '
)

# Create parameters for x and y axes
xcol_param = alt.param(
    value='MedHHInc',
    bind=dropdown_x
)
ycol_param = alt.param(
    value='MedHHInc',
    bind=dropdown_y
)
size_param = alt.param(
    value='MedHHInc',
    bind=dropdown_size
)

chart2 = alt.Chart(us_rescaled_final).mark_circle().encode(
    x=alt.X('x:Q', scale=alt.Scale(zero=False, domain='unaggregated')).title(''),
    y=alt.Y('y:Q', scale=alt.Scale(zero=False, domain='unaggregated')).title(''),
    size=alt.Size('size:Q', scale=alt.Scale(zero=False, domain='unaggregated')).title(''),
    color='NAME_x:N',
    tooltip=['NAME_x'] + variables  # Concatenate NAME_x with the existing variables list
).transform_calculate(
    x=f'datum[{xcol_param.name}]',
    y=f'datum[{ycol_param.name}]',
    size=f'datum[{size_param.name}]'
).add_params(
    xcol_param,
    ycol_param,
    size_param,
).properties(width=800, height=800)

chart2

3.3 Map

!pip install geopandas hvplot panel
import geopandas as gpd
import hvplot.pandas
import panel as pn
# Convert from wide to long data
us_rescaled_final_long = pd.melt(us_rescaled_final,
                                 id_vars = ['STATEFP', 'STATENS', 'GEOIDFQ', 'GEOID', 'STUSPS', 'NAME_x', 'LSAD','ALAND', 'AWATER', 'geometry', 'NAME_y', 'GEO_ID'],
                                 value_vars=['MedHHInc', 'EducTotal', 'EducBelowHighSch', 'EducHighSch', 'EducAssoc', 'EducBach', 'TotalPop', 'TotalPop16', 'LabForTotal', 'Unemployed', 'PopPovertyDetermined', 'PovertyPop', 'PctBach', 'PovertyRate', 'UnemploymentRate', 'LabForParticipationRate', 'netexport', 'REALGDP', 'life_expectancy', 'Labor_Productivity_2023', 'REALGDPpercapita']
                                 )
us_rescaled_final_long.hvplot(
    c="value",
    dynamic=False,
    width=1000,
    height=1000,
    geo=True,
    cmap="viridis",
    groupby="variable",
    )